
/* downloaded from Census, Summary File 1, 2000 */

**These are very large datasets from the 2000 Census Summary File 1. They can be downloaded directly from Census's website. We provide the processed files, code to process raw files is commented out.


cd "$datafolder"

/**Summary File 1 2000 (100 percent, to block level, population, ageXsex, race/ethnicity)**

foreach state in ak al ar az ca co ct dc de fl ga hi ia id il in ks ky la ma md mi mn mo ms mt me nc nd nh nj nm nv ny ne oh ok or pa ri sc sd tn tx ut va vt wa wi wv wy {

insheet using "`state'00002.uf1", comma clear
rename v1 fileid 
rename v2 stusab
rename v3 charitr
rename v5 logrecno

rename v180 total_male
rename v201 total_female

forvalues n=0(1)19{
local m=`n'+181
rename v`m' male_age`n'
}

forvalues n=0(1)19{
local m=`n'+202
rename v`m' female_age`n'
}

egen male=rowtotal(male_age0-male_age19)
egen female=rowtotal(female_age0-female_age19)
assert male==total_male
assert female==total_female
drop male female total_male total_female


drop v*


save "`state'sf100_2.dta", replace
}


foreach state in ak al ar az ca co ct dc de fl ga hi ia id il in ks ky la ma md mi mn mo ms mt me nc nd nh nj nm nv ny ne oh ok or pa ri sc sd tn tx ut va vt wa wi wv wy {

**geo info**

infix str fileid1 1-6 str stusab1 7-8 sumlev 9-11 charitr1 14-16 logrecno 19-25 str zipcode 161-165 using "`state'geo.uf1", clear


merge 1:1 logrecno using "`state'sf100_2.dta"
assert _merge==3
drop _merge

foreach n in fileid stusab charitr {
assert `n'1==`n'
drop `n'
}


**keep only zipcodes
keep if sumlev==871
drop stusab sumlev fileid  logrecno charitr

forvalues n=0(1)19{
gen pop_age`n'=male_age`n'+female_age`n'
}

keep pop* zipcode 

**drop areas not assigned to zips in census
destring(zipcode), replace force


save "`state'sf100.dta", replace
}




**COMBINE 2000 SF1
use "aksf100.dta", clear
foreach state in  al ar az ca co ct dc de fl ga hi ia id il in ks ky la ma md mi mn mo ms mt me nc nd nh nj nm nv ny ne oh ok or pa ri sc sd tn tx ut va vt wa wi wv wy {
append using  "`state'sf100.dta"
}

keep zipcode pop_age15-pop_age18 year

save "census_2000_sf1.dta" */


use "census_2000_sf1.dta", clear

gen psat_pop=pop_age15+pop_age16+pop_age17
gen sat_pop=pop_age16+pop_age17+pop_age18

label var psat_pop "Pop age 15-17"
label var sat_pop "Pop age 16-18"

gen zip=zipcode
save "$datafolder2/zipcode_population_census00.dta", replace
